*Program 1: First DATA step (p.9); data first_data; input y; datalines; 2 5 9 9 10 11 ; run; *Program 2: Printing the data file (p.15); proc print data=first_data; title; run; *Program 3: Using PROC UNIVARIATE (p.16); proc univariate plot data=first_data; var y; run; *Program 4(a): Missing semicolons in Program 3 (p.19); proc univariate plot data=first_data var y; run; *Program 4(b): Missing semicolons in Program 3 (p.21); proc univariate plot data=first_data; var y run; *Program 5: Reading a delimited file (p.22); data first_data2; infile 'C:\Documents\SAS workshop\MyData\datafile.txt'; input y; run; *Program 6: Reading tab delimited file (p.25); data pulse; infile 'C:\Users\sjricht2\Box Sync\Consulting\Workshops\SAS Workshop\data.txt' firstobs=2 dlm='09'x; input Height Weight Age Gender$ Smokes Alcohol Exercise Ran Pulse1 Pulse2 Year; run; *Program 7: Reading a csv delimited file (p.26); data pulse; infile 'C:\Documents\SAS workshop\MyData\data.csv' dlm=',' firstobs=2; input Height Weight Age Gender$ Smokes Alcohol Exercise Ran Pulse1 Pulse2 Year; run; *Program 8: Exploring the details of a data file (p.33); proc contents data=pulse; run; *Program 9(a): Explore the relationship between Pulse1 and Weight (p.35); proc sgscatter data=pulse; plot pulse1*weight; run; *Program 9(b): Add regression line (p.36); proc sgscatter data=pulse; plot pulse1*weight / reg; run; *Program 9(c): Add regression line (p.37); proc reg data=pulse; model pulse1=weight; run; *Program 9(d): Add residual plot (p.43); proc reg data=pulse; model pulse1=weight; plot r.*p.; run; *Program 9(e): Correlation (p.44); proc corr data=pulse; var weight pulse1; run; *Program 10(a): Boxplots of Pulse1 by smoking status (p.45); proc boxplot data=pulse; plot pulse1*smokes; run; *Program 10(b): Boxplots of Pulse1 by smoking status redo (p.46); proc sort data=pulse; by smokes; run; proc boxplot data=pulse; plot pulse1*smokes; run; *Program 10(c): Schematic Boxplots (p.47); proc boxplot data=pulse; plot pulse1*smokes / boxstyle=schematic; format smokes smokes_label.; run; *Program 10(d): Boxplots using SGPLOT (p.50); proc sgplot data=pulse; vbox pulse1 / group=smokes; run; *Program 11: Creating formats and labels (p.52); proc format; value smokes_label 1 = 'Yes' 2 = 'No'; run; proc sgplot data=pulse; vbox pulse1 / group=smokes; format smokes smokes_label.; label smokes = 'Smoker' pulse1 = 'Initial pulse (b/m)'; run; *Program 12(a): Means and standard deviations (p.54); proc means data=pulse; class smokes; var pulse1; run; *Program 12(b): Means and standard deviations (p.54); proc means data=pulse n mean std; class smokes; var pulse1; run; *Program 13: Independent samples t-test (p.56); proc ttest data=pulse; class smokes; var pulse1; run; *Program 14(a): Frequency table (p.58); proc freq data=pulse; tables ran; run; *Program 14(b): Frequency table, test for proportions (p.59); proc freq data=pulse; tables ran / binomial; run; *Program 15(a): Crosstabs (p.61); proc freq data=pulse; tables year*ran; run; *Program 15(b): Crosstabs, chi-squared tests (p.63); proc freq data=pulse; tables year*ran / chisq; run; *Program 16(a): Frequency plots (p.66); proc freq data=pulse; where year=93; tables ran / plots=freqplot; run; *Program 16(b): Two-way bar charts (p.69); proc freq data=pulse; tables year*ran / plots=freqplot(twoway=cluster); run; *Program 17(a): Creating a new variable (p.70); data pulse2; set pulse; if year=93 then year93='Y'; if year~=93 then year93='N'; run; proc freq data=pulse2; tables year93; run; *Program 17(b): Chi-squared tests (p.71); proc freq data=pulse2; tables year93*Ran / chisq; run; *Program 18: Calculating a difference variable (p.72); data pulse2; set pulse2; pulse_diff = pulse2-pulse1; avg_pulse = mean(pulse1,pulse2); run; *Program 19(a): Means and t-test for dependent samples--MEANS procedure (p.73); proc means data=pulse2 n mean std lclm uclm t probt; where ran=1; var pulse2 pulse1 pulse_diff; run; *Program 19(b): t-test for dependent samples—TTEST procedure (p.74); proc ttest data=pulse2; where ran = 1; paired pulse2*pulse1; run;